K-Mean & DBSCAN¶

  • Bruno Morgado (301154898)
In [1]:
# Necessary Imports
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
from collections import defaultdict
import sklearn
from sklearn.datasets import fetch_olivetti_faces
from sklearn.metrics import accuracy_score, silhouette_score, classification_report
from sklearn.svm import SVC
from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.model_selection import train_test_split, cross_val_score
import warnings
In [2]:
# Remove annoying alerts 
warnings.filterwarnings('ignore')
In [3]:
# Fetch Olivetti dataset from Sklearn
dataset = fetch_olivetti_faces(shuffle=True, random_state=98)
In [4]:
type(dataset)
Out[4]:
sklearn.utils._bunch.Bunch
In [5]:
# Storing features, target variable, and 2d features matrix as images
X = dataset.data
y = dataset.target
images = dataset.images  
In [6]:
type(images)
Out[6]:
numpy.ndarray
In [7]:
images.shape
Out[7]:
(400, 64, 64)
In [8]:
# Bundle X and y into a dataframe
pixel_columns = [f"pixel_{i}" for i in range(1, X.shape[1] + 1)]

df = pd.DataFrame(X, columns=pixel_columns)

df['target'] = y
In [9]:
df.head()
Out[9]:
pixel_1 pixel_2 pixel_3 pixel_4 pixel_5 pixel_6 pixel_7 pixel_8 pixel_9 pixel_10 ... pixel_4088 pixel_4089 pixel_4090 pixel_4091 pixel_4092 pixel_4093 pixel_4094 pixel_4095 pixel_4096 target
0 0.086777 0.099174 0.115702 0.128099 0.214876 0.359504 0.512397 0.603306 0.652893 0.702479 ... 0.487603 0.479339 0.466942 0.450413 0.454545 0.454545 0.210744 0.318182 0.491736 37
1 0.260331 0.351240 0.438017 0.553719 0.648760 0.694215 0.747934 0.789256 0.809917 0.830579 ... 0.541322 0.516529 0.520661 0.326446 0.074380 0.223140 0.256198 0.309917 0.289256 7
2 0.103306 0.219008 0.177686 0.219008 0.392562 0.574380 0.669421 0.681818 0.710744 0.731405 ... 0.367769 0.433884 0.421488 0.425620 0.429752 0.429752 0.438017 0.475207 0.276859 3
3 0.669421 0.636364 0.648760 0.685950 0.710744 0.760331 0.768595 0.805785 0.793388 0.809917 ... 0.223140 0.219008 0.115702 0.090909 0.090909 0.095041 0.086777 0.082645 0.074380 13
4 0.425620 0.475207 0.458678 0.500000 0.524793 0.524793 0.541322 0.557851 0.586777 0.553719 ... 0.491736 0.690083 0.661157 0.669421 0.524793 0.433884 0.491736 0.483471 0.483471 33

5 rows × 4097 columns

In [10]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Columns: 4097 entries, pixel_1 to target
dtypes: float32(4096), int32(1)
memory usage: 6.3 MB
In [11]:
# Define a function to plot (default = 40) sample images
def plot_gallery(images, titles, h, w, n_row=5, n_col=8):
    """Helper function to plot a gallery of portraits"""
    plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))
    plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
    for i in range(n_row * n_col):
        plt.subplot(n_row, n_col, i + 1)
        plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)
        plt.title(titles[i], size=12)
        plt.xticks(())
        plt.yticks(())
In [12]:
plot_gallery(images, y, h=64, w=64)
plt.show()
In [13]:
# Split dataset into train, validation, and test sets with stratification
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=98, stratify=y)

X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=98, stratify=y_temp)

print(f"Training set size: {len(y_train)}")
print(f"Validation set size: {len(y_valid)}")
print(f"Test set size: {len(y_test)}")
Training set size: 280
Validation set size: 60
Test set size: 60
In [14]:
# Instatiate a Support Vector Classifier
svm_clf = SVC(kernel='rbf', random_state=98)
In [15]:
# Get 5-fold cross validation scores
k = 5
scores = cross_val_score(svm_clf, X_train, y_train, cv=k, scoring='accuracy')

print(f"Cross-validation scores (k={k}):", scores)
print("Average cross-validation score:", scores.mean())
Cross-validation scores (k=5): [0.85714286 0.92857143 0.92857143 0.83928571 0.85714286]
Average cross-validation score: 0.8821428571428571
In [16]:
# Train the SVC classifier
svm_clf.fit(X_train, y_train)
Out[16]:
SVC(random_state=98)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
SVC(random_state=98)
In [17]:
# Make predictions and print validation scores on the validation set
y_pred_valid = svm_clf.predict(X_valid)
accuracy = accuracy_score(y_valid, y_pred_valid)
print(f"Validation accuracy with kernel= rbf:", accuracy)
Validation accuracy with kernel= rbf: 0.9333333333333333
In [18]:
# Make predictions on the test set
y_pred = svm_clf.predict(X_test)
In [19]:
# Print the classification report
print('\t\tClassification Report - SVC\n\n', classification_report(y_test, y_pred))
		Classification Report - SVC

               precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         2
           2       0.00      0.00      0.00         2
           3       1.00      0.50      0.67         2
           4       1.00      0.50      0.67         2
           5       1.00      1.00      1.00         1
           6       1.00      1.00      1.00         2
           7       0.67      1.00      0.80         2
           8       0.50      1.00      0.67         1
           9       1.00      0.50      0.67         2
          10       1.00      1.00      1.00         1
          11       1.00      1.00      1.00         1
          12       1.00      1.00      1.00         1
          13       1.00      1.00      1.00         1
          14       1.00      1.00      1.00         1
          15       1.00      1.00      1.00         2
          16       1.00      1.00      1.00         1
          17       1.00      1.00      1.00         1
          18       1.00      1.00      1.00         1
          19       1.00      1.00      1.00         1
          20       1.00      1.00      1.00         1
          21       1.00      1.00      1.00         2
          22       0.25      1.00      0.40         1
          23       1.00      1.00      1.00         2
          24       1.00      1.00      1.00         1
          25       1.00      1.00      1.00         2
          26       1.00      1.00      1.00         2
          27       1.00      1.00      1.00         2
          28       1.00      1.00      1.00         1
          29       1.00      1.00      1.00         2
          30       1.00      1.00      1.00         2
          31       1.00      1.00      1.00         2
          32       1.00      1.00      1.00         1
          33       1.00      1.00      1.00         2
          34       1.00      1.00      1.00         2
          35       1.00      1.00      1.00         2
          36       1.00      1.00      1.00         1
          37       0.00      0.00      0.00         1
          38       1.00      1.00      1.00         1
          39       0.33      0.50      0.40         2

    accuracy                           0.88        60
   macro avg       0.89      0.90      0.88        60
weighted avg       0.90      0.88      0.87        60

In [20]:
# Explore the target variables further
np.unique(y, return_counts=True)
Out[20]:
(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
        34, 35, 36, 37, 38, 39]),
 array([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10], dtype=int64))
In [53]:
scores = []
range_clusters = range(2, 200)
In [54]:
# Fit kmeans with a range of clusters an make predictions
for n_clusters in range_clusters:
    kmeans = KMeans(n_clusters=n_clusters, init="k-means++", n_init='auto', random_state=98)
    kmeans.fit(X_train)
    preds = kmeans.predict(X_train)
    score = silhouette_score(X_train, preds)
    scores.append(score)
In [55]:
# Get the silhouette score for each iteration in the for loop above
for i, value in enumerate(scores):
    print(f'Index {i} : {value}')
Index 0 : 0.15040580928325653
Index 1 : 0.12763933837413788
Index 2 : 0.10531938076019287
Index 3 : 0.10427547246217728
Index 4 : 0.10007187724113464
Index 5 : 0.08305481821298599
Index 6 : 0.08263854682445526
Index 7 : 0.07518859207630157
Index 8 : 0.073173888027668
Index 9 : 0.08468463271856308
Index 10 : 0.09262099862098694
Index 11 : 0.09107372164726257
Index 12 : 0.09361782670021057
Index 13 : 0.09615043550729752
Index 14 : 0.08599311113357544
Index 15 : 0.08935511112213135
Index 16 : 0.09138374030590057
Index 17 : 0.10017868131399155
Index 18 : 0.10079923272132874
Index 19 : 0.10017043352127075
Index 20 : 0.10201352834701538
Index 21 : 0.10470715910196304
Index 22 : 0.10446251928806305
Index 23 : 0.10010135918855667
Index 24 : 0.10107807070016861
Index 25 : 0.10394089668989182
Index 26 : 0.10997769236564636
Index 27 : 0.11223439127206802
Index 28 : 0.11691679060459137
Index 29 : 0.11985310167074203
Index 30 : 0.12723535299301147
Index 31 : 0.13064169883728027
Index 32 : 0.13365775346755981
Index 33 : 0.13824263215065002
Index 34 : 0.14249515533447266
Index 35 : 0.1481761634349823
Index 36 : 0.14357256889343262
Index 37 : 0.1436309516429901
Index 38 : 0.1397218406200409
Index 39 : 0.1467689722776413
Index 40 : 0.14565765857696533
Index 41 : 0.1496649533510208
Index 42 : 0.1529509425163269
Index 43 : 0.1521298885345459
Index 44 : 0.15335075557231903
Index 45 : 0.14976109564304352
Index 46 : 0.15099528431892395
Index 47 : 0.15427374839782715
Index 48 : 0.15273474156856537
Index 49 : 0.15652386844158173
Index 50 : 0.15424807369709015
Index 51 : 0.15666350722312927
Index 52 : 0.15177112817764282
Index 53 : 0.16995060443878174
Index 54 : 0.1676616668701172
Index 55 : 0.1686926633119583
Index 56 : 0.16975824534893036
Index 57 : 0.1662697046995163
Index 58 : 0.1699931025505066
Index 59 : 0.1718224734067917
Index 60 : 0.17366155982017517
Index 61 : 0.17582568526268005
Index 62 : 0.1759594827890396
Index 63 : 0.17405714094638824
Index 64 : 0.17558467388153076
Index 65 : 0.17994172871112823
Index 66 : 0.18225790560245514
Index 67 : 0.18075674772262573
Index 68 : 0.17639149725437164
Index 69 : 0.17577309906482697
Index 70 : 0.1748947650194168
Index 71 : 0.1761694699525833
Index 72 : 0.17674699425697327
Index 73 : 0.17763976752758026
Index 74 : 0.17831090092658997
Index 75 : 0.17750287055969238
Index 76 : 0.18052951991558075
Index 77 : 0.18284288048744202
Index 78 : 0.18246963620185852
Index 79 : 0.18181942403316498
Index 80 : 0.18212315440177917
Index 81 : 0.1808498352766037
Index 82 : 0.17763452231884003
Index 83 : 0.1776471585035324
Index 84 : 0.17787683010101318
Index 85 : 0.17529632151126862
Index 86 : 0.17517326772212982
Index 87 : 0.17716245353221893
Index 88 : 0.17552055418491364
Index 89 : 0.17928534746170044
Index 90 : 0.17951925098896027
Index 91 : 0.1787957400083542
Index 92 : 0.17868363857269287
Index 93 : 0.1791733354330063
Index 94 : 0.17808859050273895
Index 95 : 0.17738115787506104
Index 96 : 0.177434504032135
Index 97 : 0.17786002159118652
Index 98 : 0.18246525526046753
Index 99 : 0.1808396726846695
Index 100 : 0.17735245823860168
Index 101 : 0.1762050837278366
Index 102 : 0.17509391903877258
Index 103 : 0.17494116723537445
Index 104 : 0.17638462781906128
Index 105 : 0.17514196038246155
Index 106 : 0.1750955581665039
Index 107 : 0.16826532781124115
Index 108 : 0.16866986453533173
Index 109 : 0.16804738342761993
Index 110 : 0.1680053174495697
Index 111 : 0.1669185906648636
Index 112 : 0.16684848070144653
Index 113 : 0.16438926756381989
Index 114 : 0.16455718874931335
Index 115 : 0.1647680401802063
Index 116 : 0.16489136219024658
Index 117 : 0.16405563056468964
Index 118 : 0.16235491633415222
Index 119 : 0.1630207598209381
Index 120 : 0.1625487059354782
Index 121 : 0.16416308283805847
Index 122 : 0.16108065843582153
Index 123 : 0.16164664924144745
Index 124 : 0.16370269656181335
Index 125 : 0.16323113441467285
Index 126 : 0.162475124001503
Index 127 : 0.15978407859802246
Index 128 : 0.1603734791278839
Index 129 : 0.1608683466911316
Index 130 : 0.15711955726146698
Index 131 : 0.15514495968818665
Index 132 : 0.15438158810138702
Index 133 : 0.15295284986495972
Index 134 : 0.15218976140022278
Index 135 : 0.151565819978714
Index 136 : 0.1495165079832077
Index 137 : 0.15253356099128723
Index 138 : 0.15231600403785706
Index 139 : 0.15198323130607605
Index 140 : 0.15227250754833221
Index 141 : 0.1531990021467209
Index 142 : 0.15380799770355225
Index 143 : 0.15116570889949799
Index 144 : 0.15025781095027924
Index 145 : 0.15048879384994507
Index 146 : 0.14884935319423676
Index 147 : 0.17412197589874268
Index 148 : 0.16910052299499512
Index 149 : 0.1681765466928482
Index 150 : 0.16596214473247528
Index 151 : 0.1649814397096634
Index 152 : 0.1641160547733307
Index 153 : 0.16402922570705414
Index 154 : 0.1618376523256302
Index 155 : 0.15940622985363007
Index 156 : 0.1595630794763565
Index 157 : 0.15751375257968903
Index 158 : 0.1581299901008606
Index 159 : 0.15863680839538574
Index 160 : 0.15741732716560364
Index 161 : 0.15688654780387878
Index 162 : 0.15663589537143707
Index 163 : 0.1536678522825241
Index 164 : 0.1506621241569519
Index 165 : 0.15042011439800262
Index 166 : 0.14912308752536774
Index 167 : 0.14985905587673187
Index 168 : 0.14700503647327423
Index 169 : 0.1476399004459381
Index 170 : 0.14566178619861603
Index 171 : 0.14340174198150635
Index 172 : 0.14179828763008118
Index 173 : 0.14100085198879242
Index 174 : 0.14016954600811005
Index 175 : 0.1385919600725174
Index 176 : 0.1381639540195465
Index 177 : 0.13688717782497406
Index 178 : 0.13773921132087708
Index 179 : 0.13580232858657837
Index 180 : 0.13455404341220856
Index 181 : 0.13365940749645233
Index 182 : 0.13127896189689636
Index 183 : 0.1298736333847046
Index 184 : 0.12892693281173706
Index 185 : 0.12756234407424927
Index 186 : 0.12827464938163757
Index 187 : 0.12666037678718567
Index 188 : 0.1267896443605423
Index 189 : 0.12822739779949188
Index 190 : 0.12558528780937195
Index 191 : 0.12391166388988495
Index 192 : 0.12362907826900482
Index 193 : 0.12385866791009903
Index 194 : 0.12379568815231323
Index 195 : 0.12335821241140366
Index 196 : 0.12285757064819336
Index 197 : 0.1215449869632721
In [56]:
# Get the highest silhouette score
best_n_clusters = range_clusters[scores.index(max(scores))]
In [57]:
best_n_clusters
Out[57]:
79
In [58]:
best_n_scores = scores[scores.index(max(scores))]
In [27]:
best_n_scores
Out[27]:
0.18284288
In [59]:
scores[77]
Out[59]:
0.18284288
In [60]:
plt.figure(figsize=(24, 8))
plt.plot(range_clusters, scores, "bo-")
plt.xlabel("$k$", fontsize=14)
plt.ylabel("Silhouette score", fontsize=14)
plt.axis([1.5, 120, 0.05, 0.5])
plt.show()
In [61]:
# Reduce the dataset dimensionality according to the number of clusters that returned the highest silhouette score
kmeans = KMeans(n_clusters=79, init="k-means++", n_init='auto', random_state=98)
In [62]:
X_reduced = kmeans.fit_transform(X)
In [63]:
X_reduced.shape
Out[63]:
(400, 79)
In [64]:
# Array with the first instance's distances to the centroids
X_reduced[0]
Out[64]:
array([11.988758 , 11.11365  , 15.363498 ,  9.444666 , 11.000386 ,
        5.4597178,  9.867196 ,  8.780561 , 12.209009 ,  9.978074 ,
       14.158886 , 14.16626  , 11.892452 ,  8.184852 ,  7.400579 ,
       11.166139 , 11.0718565, 12.919109 , 13.880829 , 11.196977 ,
        8.596896 , 11.290294 ,  9.71953  ,  9.962675 , 14.4639015,
       13.494672 , 13.580415 ,  9.469277 , 11.30324  , 11.732554 ,
       11.46953  ,  9.1103525, 10.009584 , 10.687915 , 10.7051325,
       10.389788 ,  9.519195 , 10.605962 , 10.743815 ,  9.234018 ,
       11.074283 , 13.077244 , 13.626669 ,  8.276454 , 14.109044 ,
       12.779579 , 13.597022 ,  9.647831 ,  8.340371 ,  6.6628537,
       11.615242 , 13.948507 ,  9.404628 ,  8.8362   , 13.465821 ,
        9.559306 , 11.116907 , 10.479551 , 11.788693 ,  9.9946785,
       11.711618 ,  9.873837 ,  9.338231 , 11.875794 , 13.702015 ,
        9.150525 ,  8.512709 , 16.701874 , 11.092509 , 12.574738 ,
        8.481403 , 11.284654 , 10.599734 ,  9.639815 ,  9.883467 ,
       12.434465 , 11.363939 ,  8.173353 ,  9.145894 ], dtype=float32)
In [65]:
# Split the reduced Olivetti dataset 
X_train_reduced, X_temp_reduced, y_train, y_temp = train_test_split(X_reduced, y, test_size=0.3, random_state=98, stratify=y)

X_valid_reduced, X_test_reduced, y_valid, y_test = train_test_split(X_temp_reduced, y_temp, test_size=0.5, random_state=98, stratify=y_temp)

print(f"Training set size: {len(y_train)}")
print(f"Validation set size: {len(y_valid)}")
print(f"Test set size: {len(y_test)}")
Training set size: 280
Validation set size: 60
Test set size: 60
In [66]:
# Retrain the classifier with the reduced dataset
svm_clf.fit(X_train_reduced, y_train)
Out[66]:
SVC(random_state=98)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
SVC(random_state=98)
In [67]:
k = 5
scores = cross_val_score(svm_clf, X_train_reduced, y_train, cv=k, scoring='accuracy')

print(f"Cross-validation scores (k={k}):", scores)
print("Average cross-validation score:", scores.mean())
Cross-validation scores (k=5): [0.55357143 0.57142857 0.67857143 0.46428571 0.67857143]
Average cross-validation score: 0.5892857142857142
In [68]:
y_pred_valid_reduced = svm_clf.predict(X_valid_reduced)
accuracy = accuracy_score(y_valid, y_pred_valid_reduced)
print(f"Validation accuracy with kernel= rbf:", accuracy)
Validation accuracy with kernel= rbf: 0.65
In [69]:
y_pred_reduced = svm_clf.predict(X_test_reduced)
In [70]:
# Print the classification report
print('\t\tClassification Report - SVC\n\n', classification_report(y_test, y_pred_reduced))
		Classification Report - SVC

               precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       1.00      1.00      1.00         2
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         2
           4       1.00      0.50      0.67         2
           5       0.00      0.00      0.00         1
           6       0.67      1.00      0.80         2
           7       1.00      1.00      1.00         2
           8       0.33      1.00      0.50         1
           9       0.00      0.00      0.00         2
          10       0.50      1.00      0.67         1
          11       0.00      0.00      0.00         1
          12       0.00      0.00      0.00         1
          13       0.50      1.00      0.67         1
          14       0.50      1.00      0.67         1
          15       0.00      0.00      0.00         2
          16       0.00      0.00      0.00         1
          17       0.33      1.00      0.50         1
          18       1.00      1.00      1.00         1
          19       1.00      1.00      1.00         1
          20       0.00      0.00      0.00         1
          21       1.00      1.00      1.00         2
          22       0.00      0.00      0.00         1
          23       1.00      1.00      1.00         2
          24       0.33      1.00      0.50         1
          25       1.00      1.00      1.00         2
          26       1.00      1.00      1.00         2
          27       1.00      1.00      1.00         2
          28       0.00      0.00      0.00         1
          29       0.67      1.00      0.80         2
          30       1.00      1.00      1.00         2
          31       1.00      0.50      0.67         2
          32       1.00      1.00      1.00         1
          33       1.00      1.00      1.00         2
          34       0.67      1.00      0.80         2
          35       1.00      0.50      0.67         2
          36       1.00      1.00      1.00         1
          37       0.00      0.00      0.00         1
          38       0.50      1.00      0.67         1
          39       0.00      0.00      0.00         2

    accuracy                           0.65        60
   macro avg       0.54      0.64      0.56        60
weighted avg       0.59      0.65      0.59        60

DBSCAN¶

In [71]:
X.shape
Out[71]:
(400, 4096)
In [72]:
X[:10]
Out[72]:
array([[0.08677686, 0.09917355, 0.11570248, ..., 0.2107438 , 0.3181818 ,
        0.49173555],
       [0.2603306 , 0.35123968, 0.43801653, ..., 0.25619835, 0.30991736,
        0.2892562 ],
       [0.10330579, 0.21900827, 0.17768595, ..., 0.43801653, 0.4752066 ,
        0.2768595 ],
       ...,
       [0.48347107, 0.446281  , 0.46280992, ..., 0.661157  , 0.6735537 ,
        0.6694215 ],
       [0.35123968, 0.4338843 , 0.553719  , ..., 0.1570248 , 0.2107438 ,
        0.20661157],
       [0.7107438 , 0.7107438 , 0.70247936, ..., 0.553719  , 0.30991736,
        0.30991736]], dtype=float32)
In [74]:
# Testing the distance and min_samples manually
dbscan = DBSCAN(eps = 7.24, min_samples=2)
clusters = dbscan.fit_predict(X)
In [75]:
print(np.unique(clusters))
[-1  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22
 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45]
In [76]:
print(len(np.unique(clusters)))
47
In [77]:
#Outliers
print(len(clusters[clusters == -1]))
34
In [78]:
images = X.reshape(-1, 64, 64)
In [79]:
images.shape
Out[79]:
(400, 64, 64)
In [80]:
clustered_images = defaultdict(list)
for i, cluster in enumerate(clusters):
    clustered_images[cluster].append(images[i])
In [81]:
def display_images(images, title=""):
    n_images = len(images)
    rows = int(n_images**0.5)
    cols = (n_images // rows) + (n_images % rows)
    
    plt.figure(figsize=(1.5*cols, 1.5*rows))
    for i in range(n_images):
        ax = plt.subplot(rows, cols, i + 1)
        plt.imshow(images[i], cmap="gray")
        ax.axis('off')
    
    plt.suptitle(title)
    plt.show()
In [50]:
for cluster, images in clustered_images.items():
    display_images(images, title=f"Cluster {cluster}")